This portion won't change between versions of pmdarima
. This dataset was submitted by a user in Issue #12 and showed a very slow performance on the CHTest
. Therefore, it's effective for use in benchmarking.
In [1]:
import pandas as pd
X = pd.read_csv('item_sales_daily.csv.gz')
y = X['sales'].values
X.head()
Out[1]:
In [4]:
import pmdarima as pm
import time
from functools import wraps
def timed(func):
"""A decorator to time a result"""
@wraps(func)
def wrapper(*args, **kwargs):
start = time.time()
res = func(*args, **kwargs)
print("Complete in %.3f seconds" % (time.time() - start))
return res
return wrapper
@timed
def benchmark(x, test):
res = pm.arima.nsdiffs(x, m=365, max_D=5, test=test) # 365 since daily
print("Version: %s" % pm.__version__)
return res
In [16]:
benchmark(y, "ch")
In [4]:
benchmark(y, "ch")
Version 1.2.0 added the OCSBTest
, which is orders of magnitude faster than the CHTest
.
In [5]:
benchmark(y, "ocsb")
Out[5]:
In [ ]: